home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Power Programmierung
/
Power-Programmierung CD 2 (Tewi)(1994).iso
/
doc
/
mir
/
a_pattrn.c
< prev
next >
Wrap
Text File
|
1992-07-02
|
13KB
|
374 lines
/*
* usage: a_pattrn file_name key [ /x ] [ bytes_before ] > report
* "/x" = include hex, show only 16 bytes instead of 40
* A_PATTRN List every occurrence of a key character or string in a file.
* Show 3 (or "bytes_before", range 0 to 15) bytes prior to the
* key each time. Normally show a total of 40 bytes each time
* the key is found; if the "/x" argument is set, show only 16
* bytes, but in hex and ASCII both. The key may be from 1 to
* 16 characters. Within the key, any non-printing characters,
* characters which may confuse DOS (> or < or |), linefeeds,
* blanks, backslash, etc. must be shown in hex form... a
* backslash and 2 hex digits. Examples:
* a_pattrn herfile \8E > herfile.8e
* a_pattrn yourfile * 7 > yourfile.ast
* a_pattrn myfile Mother
* a_pattrn hisfile \94\05ke\ff 0 > 5char.pat
*
* input: Any file whatsoever.
*
* output: One line for each occurrence of the target byte(s) in the file.
* Sort the result to make patterns show up more clearly.
*
* writeup: MIR TUTORIAL ONE, topic 5
*
* Written: Douglas Lowry Jan 07 92
* Modified: Douglas Lowry Apr 03 92 Modify arguments
* Copyright (C) 1992 Marpex Inc.
*
* The MIR (Mass Indexing and Retrieval) Tutorials explain detailed
* usage and co-ordination of the MIR family of programs to analyze,
* prepare and index databases (small through gigabyte size), and
* how to build integrated retrieval software around the MIR search
* engine. The fifth of the five MIR tutorial series explains how
* to extend indexing capability into leading edge search-related
* technologies. For more information, GO IBMPRO on CompuServe;
* MIR files are in the DBMS library. The same files are on the
* Canada Remote Systems BBS. A diskette copy of the Introduction
* is available by mail ($10 US... check, Visa or Mastercard);
* diskettes with Introduction, Tutorial ONE software and the
* shareware Tutorial ONE text cost $29. Shareware registration
* for a tutorial is also $29.
*
* E-mail...
* Compuserve 71431,1337
* Internet doug.lowry%canrem.com
* UUCP canrem!doug.lowry
* Others: doug.lowry@canrem.uucp
*
* FAX... 416 963-5677
*
* "Snail mail"... Douglas Lowry, Ph.D.
* Marpex Inc.
* 5334 Yonge Street, #1102
* North York, Ontario
* Canada M2N 6M2
*
* Related database consultation and preparation services are
* available through:
* Innotech Inc., 2001 Sheppard Avenue E., Suite #118,
* North York, Ontario Canada M2J 4Z7
* Tel. 416 492-3838 FAX 416 492-3843
*
* This program is free software; you may redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* (file 05LICENS) along with this program; if not, write to the
* Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
* USA.
*/
#include <stdio.h>
#include <stdlib.h>
#include <dos.h>
#include <ctype.h>
#include <direct.h>
#define BIGBUF 2048
#define repeat for(;;)
typedef enum _bool
{ FALSE = 0, TRUE = 1 } Bool ;
/*
* declarations
*/
void process( ), Usage_( ), line_out( );
unsigned long int xtol_n( );
char *Cmdname_() { return( "a_pattrn" ) ; }
/*
* MAIN -
*/
main( argc, argv )
int argc;
char **argv;
{
FILE *fp_in ;
Bool ascii ; /* ASCII display only requested */
int len, i ;
unsigned long int l_test ;
unsigned char key[16]; /* character requested by user */
short int precede, /* bytes before key to be shown */
key_len,
pt ; /* pointer within "key" argument*/
/* usage: a_pattrn file_name key [ -x ] [ bytes_before ] > report*/
if( argc < 3 || argc > 5 )
Usage_();
if(( fp_in = fopen( argv[1], "r+b" )) == NULL )
{
fprintf( stderr, "Unable to open input file %s\n", argv[1] );
Usage_() ;
}
pt = 0 ;
for( key_len = 0 ; key_len < 16 ; key_len++ )
{
if( !argv[2][pt] )
break ;
if( argv[2][pt] != 0x5c ) /* not backslash = printable */
{
key[ key_len ] = ( unsigned char ) argv[2][pt] ;
pt++ ;
}
else
{
l_test = xtol_n( &argv[2][pt+1], 2 ) ;
if( l_test > 0xff )
Usage_();
key[ key_len ] = ( unsigned char ) l_test ;
pt += 3 ;
}
}
precede = 3 ;
ascii = TRUE ;
for( i = 3; i < argc ; i++ )
{
if( islower( argv[i][1] ))
argv[i][1] = toupper( argv[i][1] ) ;
if( argv[i][1] == 'X' && ( argv[i][0] == '/' || argv[i][0] == '-' ))
ascii = FALSE ;
else
{
precede = atoi( argv[i] );
if( precede > 16 - key_len )
precede = 16 - key_len ;
if( precede < 0 )
precede = 0 ;
}
}
process( key, key_len, ascii, precede, fp_in );
fclose( fp_in );
exit( 0 );
}
void
Usage_( )
{
fprintf( stderr,
"usage: %s file_name key [ /x ] [ bytes_before ] > report\n\
\"/x\" = include hex, show only 16 bytes instead of 40\n\
List every occurrence of a key character or string in a file.\n\
Show 3 (or \"bytes_before\", range 0 to 15) bytes prior to the\n",
Cmdname_() );
fprintf( stderr,
" key each time. Normally show a total of 40 bytes each time\n\
the key is found; if the \"/x\" argument is set, show only 16\n\
bytes, but in hex and ASCII both. The key may be from 1 to\n\
16 characters. Within the key, any non-printing characters,\n" );
fprintf( stderr,
" characters which may confuse DOS (> or < or |), linefeeds,\n\
blanks, backslash, etc. must be shown in hex form... a\n\
backslash and 2 hex digits. Examples:\n\
a_pattrn herfile \8E > herfile.8e\n\
a_pattrn yourfile * 7 > yourfile.ast\n" ) ;
fprintf( stderr,
" a_pattrn myfile Mother\n\
a_pattrn hisfile \\94\\05ke\\ff 0 > 5char.pat\n\n\
input: Any file whatsoever.\n\n\
output: One line for each occurrence of the target byte(s) in the file.\n\
Sort the result to make patterns show up more clearly.\n\n\
writeup: MIR TUTORIAL ONE, topic 5\n" ) ;
exit( 1 );
}
/*
* XTOL_N Converts a specified number of bytes of hexadecimal string
* to unsigned long integer. Returns 0xffffffff if any
* non-hex character is encountered within the byte count,
* or if the byte count exceeds 8.
*/
unsigned long int
xtol_n( string, bytes )
unsigned char string[] ;
short int bytes ;
{
int pt ;
unsigned long int test_l ; /* value of hex expression */
unsigned char this_hex, /* value of one character */
uc ;
if( !bytes )
return( 0 ) ;
if( bytes > 8 )
return( 0xffffffff );
test_l = 0 ;
for( pt = 0 ; pt < bytes ; pt++ )
{
if( !isxdigit( string[ pt ] ))
return( 0xffffffff );
uc = ( unsigned char ) string[ pt ] ;
if( isdigit( uc ))
this_hex = uc - ( unsigned char ) '0' ;
else if( isupper( uc ))
this_hex = uc - ( unsigned char ) 'A' + 10 ;
else
this_hex = uc - ( unsigned char ) 'a' + 10 ;
test_l = ( test_l << 4 ) + ( unsigned long ) this_hex ;
}
return( test_l );
}
/*
* PROCESS - Passes through 1 file looking for key, outputting
* [preceding and] following characters when found.
*/
void
process( key, key_len, ascii, precede, fp_in )
unsigned char key[16]; /* characters requested by user */
short int key_len,
precede; /* bytes before key to be shown */
Bool ascii ; /* 40 bytes ASCII, no hex display*/
FILE *fp_in ;
{
unsigned char buffer[ BIGBUF ];
Bool need_data,
good_key ; /* found a match */
long int cum_byt; /* cumulative bytes into file */
int length, /* of buffer contents */
pt, /* current byte in buffer */
display, /* 16 or 40 bytes long */
adjust,
i, j, pt2 ;
cum_byt = adjust = 0;
need_data = TRUE ;
display = 16 ;
if( ascii )
display = 40 ;
repeat
{
if( need_data )
{
length = fread( &buffer[ adjust ], sizeof( char ),
( BIGBUF - adjust ), fp_in );
length += adjust;
if( !length )
break ;
if( adjust )
pt = precede;
else
pt = 0 ;
adjust = 0 ;
need_data = FALSE ;
/* After the end of last buffer in the file, */
/* reduce any trailing bytes to NULLs. */
if( length < BIGBUF )
{
for( i= length, j= 0 ; ( i < BIGBUF && j < display ) ;
i++, j++ )
buffer[i] = '\0' ;
}
}
if( length < BIGBUF && pt == length )
break ; /* Normal exit */
if( pt + display + 1 > length && length == BIGBUF )
{
for( adjust = 0, j = pt - precede ; j < length ; j++,
adjust++ )
buffer[ adjust ] = buffer[ j ] ;
need_data = TRUE ;
continue ;
}
if( buffer[pt] != key[0] )
{
pt++ ;
cum_byt++ ;
continue; /* the most typical action */
}
good_key = TRUE ;
for( pt2 = pt + 1, i = 1 ; i < key_len ; pt2++, i++ )
{
if( buffer[pt2] != key[i] )
{
good_key = FALSE ;
break ;
}
}
if( good_key )
line_out( cum_byt, &buffer[ pt - precede ], ascii ) ;
pt++ ;
cum_byt++ ;
}
return;
}
/*
* LINE_OUT Output an offset followed by 16 bytes, first in
* hexadecimal, then in printable form, with periods
* substituting for non-printable characters. Where
* ASCII only is requested, 40 bytes are output.
*/
void
line_out( offset, buf, ascii )
long int offset ;
unsigned char *buf ;
Bool ascii ;
{
int display,
i ;
display = 16 ;
if( ascii )
display = 40 ;
printf( "%08ld: ", offset );
if( !ascii )
{
for( i = 0 ; i < 16 ; i++ )
printf( "%02x ", buf[ i ] );
}
printf( " " );
for( i = 0 ; i < display ; i++ )
{
if( isprint( buf[i] ))
putchar( buf[i] );
else
putchar( '.' );
}
/* If we test the output once per line, */
/* that will catch any write errors. */
if( putchar( '\n' ) != '\n' )
{
fprintf( stderr, "FATAL... Unable to write output.\n\n" );
exit( 1 );
}
return ;
}